library(dplyr)
library(stringr)
library(psych)
library(ggplot2)

setwd("Your working directory")
load(file='PFP.Anonymized.rda')


# 1. Duplicated Treatments
# 1.1 Assigning Treatment ID (# 127)
Treatment_ID <- PFP.Anonymized %>%
  dplyr::select(F.1.1.1,F.1.1.2,F.1.1.3,F.1.1.4,F.1.1.5,F.1.1.6,F.1.1.7,F.1.1.8,
                F.1.2.1,F.1.2.2,F.1.2.3,F.1.2.4,F.1.2.5,F.1.2.6,F.1.2.7,F.1.2.8,
                F.2.1.1,F.2.1.2,F.2.1.3,F.2.1.4,F.2.1.5,F.2.1.6,F.2.1.7,F.2.1.8,
                F.2.2.1,F.2.2.2,F.2.2.3,F.2.2.4,F.2.2.5,F.2.2.6,F.2.2.7,F.2.2.8,
                F.3.1.1,F.3.1.2,F.3.1.3,F.3.1.4,F.3.1.5,F.3.1.6,F.3.1.7,F.3.1.8,
                F.3.2.1,F.3.2.2,F.3.2.3,F.3.2.4,F.3.2.5,F.3.2.6,F.3.2.7,F.3.2.8) %>%
  distinct() %>%
  mutate(Treatment.ID=row_number()) %>%
  mutate(Treatment.ID=factor(Treatment.ID))

PFP.ID.Dup <- merge(x=PFP.Anonymized, y=Treatment_ID,
                    by=c('F.1.1.1','F.1.1.2','F.1.1.3','F.1.1.4','F.1.1.5','F.1.1.6','F.1.1.7','F.1.1.8',
                         'F.1.2.1','F.1.2.2','F.1.2.3','F.1.2.4','F.1.2.5','F.1.2.6','F.1.2.7','F.1.2.8',
                         'F.2.1.1','F.2.1.2','F.2.1.3','F.2.1.4','F.2.1.5','F.2.1.6','F.2.1.7','F.2.1.8',
                         'F.2.2.1','F.2.2.2','F.2.2.3','F.2.2.4','F.2.2.5','F.2.2.6','F.2.2.7','F.2.2.8',
                         'F.3.1.1','F.3.1.2','F.3.1.3','F.3.1.4','F.3.1.5','F.3.1.6','F.3.1.7','F.3.1.8',
                         'F.3.2.1','F.3.2.2','F.3.2.3','F.3.2.4','F.3.2.5','F.3.2.6','F.3.2.7','F.3.2.8'), 
                    all.x=TRUE)
colSums(is.na(PFP.ID.Dup))
table(PFP.ID.Dup$Treatment.ID, exclude=NULL)

PFP.Treat.ID <- PFP.ID.Dup %>%
  dplyr::select(ID,Treatment.ID,Study.ID,StartDate,EndDate,Status,Progress,Duration..in.seconds.,Finished,
                RecordedDate,DistributionChannel,UserLanguage,IC,everything()) %>%
  arrange(ID)

# 1.2 Time Differences of Duplicated Treatments
PFP.Time <- PFP.Treat.ID %>% mutate(StartTime=paste0(StartDate,":00")) %>%
  mutate(StartTime1=as.POSIXct(StartTime,format="%m/%d/%Y %H:%M:%S"))
str(PFP.Time$StartTime1)

PFP.Time1 <- PFP.Time %>% group_by(Treatment.ID) %>%
  slice(which.max(StartTime1)) %>% dplyr::select(StartTime1)
PFP.Time2 <- PFP.Time %>% group_by(Treatment.ID) %>%
  slice(which.min(StartTime1))  %>% dplyr::select(StartTime1)
PFP.Time3 <- merge(x=PFP.Time1,y=PFP.Time2,
                   by='Treatment.ID', all.x=TRUE)
PFP.Time4 <- PFP.Time3 %>% mutate(MaxMinTimeGap=StartTime1.x-StartTime1.y)
PFP.Time5 <- PFP.Time4 %>% dplyr::select(Treatment.ID,MaxMinTimeGap)

PFP.Treat.ID.Time <- merge(x=PFP.Treat.ID, y=PFP.Time5,
                           by='Treatment.ID', all.x=TRUE)
PFP.Treat.ID.Time <- PFP.Treat.ID.Time %>%
  dplyr::select(ID,Treatment.ID,Study.ID,StartDate,EndDate,MaxMinTimeGap,Status,Progress,Duration..in.seconds.,
                Finished,RecordedDate,DistributionChannel,UserLanguage,IC,everything()) %>%
  arrange(ID)


# 2. Self-Efficacy & PSM PCA
# 2.1 Self-Efficacy
PFP.Choice.Efficacy <- PFP.Treat.ID.Time %>%
  mutate(SE1=ifelse(SelfEfficacy_1=="Strongly agree", 5,
                    ifelse(SelfEfficacy_1=="Agree", 4,
                           ifelse(SelfEfficacy_1=="Neither agree nor disagree", 3,
                                  ifelse(SelfEfficacy_1=="Disagree", 2, 
                                         ifelse(SelfEfficacy_1=="Strongly disagree", 1, 99))))),
         SE2=ifelse(SelfEfficacy_2=="Strongly agree", 5,
                    ifelse(SelfEfficacy_2=="Agree", 4,
                           ifelse(SelfEfficacy_2=="Neither agree nor disagree", 3,
                                  ifelse(SelfEfficacy_2=="Disagree", 2, 
                                         ifelse(SelfEfficacy_2=="Strongly disagree", 1, 99))))),
         SE3=ifelse(SelfEfficacy_3=="Strongly agree", 5,
                    ifelse(SelfEfficacy_3=="Agree", 4,
                           ifelse(SelfEfficacy_3=="Neither agree nor disagree", 3,
                                  ifelse(SelfEfficacy_3=="Disagree", 2, 
                                         ifelse(SelfEfficacy_3=="Strongly disagree", 1, 99))))),
         SE4=ifelse(SelfEfficacy_4=="Strongly agree", 5,
                    ifelse(SelfEfficacy_4=="Agree", 4,
                           ifelse(SelfEfficacy_4=="Neither agree nor disagree", 3,
                                  ifelse(SelfEfficacy_4=="Disagree", 2, 
                                         ifelse(SelfEfficacy_4=="Strongly disagree", 1, 99))))),
         SE5=ifelse(SelfEfficacy_5=="Strongly agree", 5,
                    ifelse(SelfEfficacy_5=="Agree", 4,
                           ifelse(SelfEfficacy_5=="Neither agree nor disagree", 3,
                                  ifelse(SelfEfficacy_5=="Disagree", 2, 
                                         ifelse(SelfEfficacy_5=="Strongly disagree", 1, 99))))),
         SE6=ifelse(SelfEfficacy_6=="Strongly agree", 5,
                    ifelse(SelfEfficacy_6=="Agree", 4,
                           ifelse(SelfEfficacy_6=="Neither agree nor disagree", 3,
                                  ifelse(SelfEfficacy_6=="Disagree", 2, 
                                         ifelse(SelfEfficacy_6=="Strongly disagree", 1, 99))))),
         SE7=ifelse(SelfEfficacy_7=="Strongly agree", 5,
                    ifelse(SelfEfficacy_7=="Agree", 4,
                           ifelse(SelfEfficacy_7=="Neither agree nor disagree", 3,
                                  ifelse(SelfEfficacy_7=="Disagree", 2, 
                                         ifelse(SelfEfficacy_7=="Strongly disagree", 1, 99))))),
         SE8=ifelse(SelfEfficacy_8=="Strongly agree", 5,
                    ifelse(SelfEfficacy_8=="Agree", 4,
                           ifelse(SelfEfficacy_8=="Neither agree nor disagree", 3,
                                  ifelse(SelfEfficacy_8=="Disagree", 2, 
                                         ifelse(SelfEfficacy_8=="Strongly disagree", 1, 99))))))
table(PFP.Choice.Efficacy$SE1, exclude=NULL)
table(PFP.Choice.Efficacy$SE2, exclude=NULL)
table(PFP.Choice.Efficacy$SE3, exclude=NULL)
table(PFP.Choice.Efficacy$SE4, exclude=NULL)
table(PFP.Choice.Efficacy$SE5, exclude=NULL)
table(PFP.Choice.Efficacy$SE6, exclude=NULL)
table(PFP.Choice.Efficacy$SE7, exclude=NULL)
table(PFP.Choice.Efficacy$SE8, exclude=NULL)

library(ltm)
cronbach.alpha(PFP.Choice.Efficacy[,145:152])
Efficacy.PCA <- principal(PFP.Choice.Efficacy[,145:152], nfactors=1, rotate="varimax")
Efficacy.PCA.Score <- data.frame(Efficacy.PCA$scores)
Efficacy.PCA$loadings

PFP.Choice.Efficacy.Score <- cbind(PFP.Choice.Efficacy,Efficacy.PCA.Score) %>%
  rename(Efficacy_Factor=PC1)
summary(PFP.Choice.Efficacy.Score$Efficacy_Factor)

PFP.Choice.Efficacy.Score.C <- PFP.Choice.Efficacy.Score %>%
  mutate(Efficacy_C=ifelse(Efficacy_Factor>0, "High", "Low"),
         Efficacy_C=as.factor(Efficacy_C))
colSums(is.na(PFP.Choice.Efficacy.Score.C))

# 2.2 PSM 
PFP.Choice.PSM <- PFP.Choice.Efficacy.Score.C %>%
  mutate(PSM1=ifelse(PSM_1=="Strongly agree", 5,
                     ifelse(PSM_1=="Agree", 4,
                            ifelse(PSM_1=="Neither agree nor disagree", 3,
                                   ifelse(PSM_1=="Disagree", 2, 
                                          ifelse(PSM_1=="Strongly disagree", 1, 99))))),
         PSM2=ifelse(PSM_2=="Strongly agree", 5,
                     ifelse(PSM_2=="Agree", 4,
                            ifelse(PSM_2=="Neither agree nor disagree", 3,
                                   ifelse(PSM_2=="Disagree", 2, 
                                          ifelse(PSM_2=="Strongly disagree", 1, 99))))),
         PSM3=ifelse(PSM_3=="Strongly agree", 5,
                     ifelse(PSM_3=="Agree", 4,
                            ifelse(PSM_3=="Neither agree nor disagree", 3,
                                   ifelse(PSM_3=="Disagree", 2, 
                                          ifelse(PSM_3=="Strongly disagree", 1, 99))))),
         PSM4=ifelse(PSM_4=="Strongly agree", 5,
                     ifelse(PSM_4=="Agree", 4,
                            ifelse(PSM_4=="Neither agree nor disagree", 3,
                                   ifelse(PSM_4=="Disagree", 2, 
                                          ifelse(PSM_4=="Strongly disagree", 1, 99))))),
         PSM5=ifelse(PSM_5=="Strongly agree", 5,
                     ifelse(PSM_5=="Agree", 4,
                            ifelse(PSM_5=="Neither agree nor disagree", 3,
                                   ifelse(PSM_5=="Disagree", 2, 
                                          ifelse(PSM_5=="Strongly disagree", 1, 99))))))
table(PFP.Choice.PSM$PSM1, exclude=NULL)
table(PFP.Choice.PSM$PSM2, exclude=NULL)
table(PFP.Choice.PSM$PSM3, exclude=NULL)
table(PFP.Choice.PSM$PSM4, exclude=NULL)
table(PFP.Choice.PSM$PSM5, exclude=NULL)

cronbach.alpha(PFP.Choice.PSM[,155:159])
PSM.PCA <- principal(PFP.Choice.PSM[,155:159], nfactors=1, rotate="varimax")
PSM.PCA.Score <- data.frame(PSM.PCA$scores)
PSM.PCA$loadings

PFP.Choice.PSM.Score <- cbind(PFP.Choice.PSM,PSM.PCA.Score) %>%
  rename(PSM_Factor=PC1)
summary(PFP.Choice.PSM.Score$PSM_Factor)

PFP.Choice.PSM.Score.C <- PFP.Choice.PSM.Score %>%
  mutate(PSM_C=ifelse(PSM_Factor>0, "High", "Low"),
         PSM_C=as.factor(PSM_C))
colSums(is.na(PFP.Choice.PSM.Score.C))


# 3. Risk Aversion
table(PFP.Choice.PSM.Score.C$GeneralRisk, exclude=NULL)
table(PFP.Choice.PSM.Score.C$LotteryRisk, exclude=NULL)
summary(PFP.Choice.PSM.Score.C$LotteryRisk, exclude=NULL)

PFP.Choice.Risk <- PFP.Choice.PSM.Score.C %>%
  mutate(GeneralRiskNum=ifelse(GeneralRisk=="Unwilling to take risks  0",0,
                               ifelse(GeneralRisk=="Fully prepared to take risk  10", 10, GeneralRisk)),
         GeneralRiskNum=as.numeric(GeneralRiskNum)) %>%
  mutate(GeneralRiskC=ifelse(GeneralRiskNum>6,"High Risk",
                             ifelse(GeneralRiskNum<4,"Low Risk", "Medium Risk")),
         GeneralRiskC=as.factor(GeneralRiskC),
         GeneralRiskB=ifelse(GeneralRiskNum>5,1,0),
         LotteryRiskC=ifelse(LotteryRisk>=20,"High Risk",ifelse(LotteryRisk<=5,"Low Risk","Medium Risk")),
         LotteryRiskC=as.factor(LotteryRiskC),
         LotteryRiskB=ifelse(LotteryRisk>10,1,0))
colSums(is.na(PFP.Choice.Risk))
table(PFP.Choice.Risk$GeneralRiskC, exclude=NULL)
table(PFP.Choice.Risk$LotteryRiskC, exclude=NULL)
summary(PFP.Choice.Risk$GeneralRiskNum)
table(PFP.Choice.Risk$GeneralRiskB, exclude=NULL)
table(PFP.Choice.Risk$LotteryRiskB, exclude=NULL)


# 4. Race/Gender/Age
summary(PFP.Choice.Risk$Age)
table(PFP.Choice.Risk$Race,exclude=NULL)
table(PFP.Choice.Risk$Gender,exclude=NULL)

PFP.Choice.Race <- PFP.Choice.Risk %>%
  mutate(Race_C=ifelse(Race=="White","White",
                       ifelse(Race=="Hispanic, Latino, or Spanish" | Race=="White,Hispanic, Latino, or Spanish","Hispanic",
                              ifelse(Race=="Black or African American","Black",
                                     ifelse(Race=="Asian","Asian", 
                                            ifelse(Race=="Prefer not to answer","Prefer not to answer","Other"))))),
         Race_C=factor(Race_C),
         Race_B=ifelse(Race=="White","White",
                       ifelse(Race=="Prefer not to answer","Prefer not to answer","Minority")),
         Race_B=factor(Race_B),
         Gender=factor(Gender),
         Age_B=ifelse(Age>31,1,0))
table(PFP.Choice.Race$Race_B)
table(PFP.Choice.Race$Race_C)
table(PFP.Choice.Race$Gender)
table(PFP.Choice.Race$Age_B)

PFP.Choice.Race <- within(PFP.Choice.Race, Race_B <- relevel(Race_B, ref = "White"))
PFP.Choice.Race <- within(PFP.Choice.Race, Race_C <- relevel(Race_C, ref = "White"))
PFP.Choice.Race <- within(PFP.Choice.Race, Gender <- relevel(Gender, ref = "Male"))


# 5. Cleaning & Restructuring
PFP.Com_Scale_1_1 <- data.frame(str_sub(PFP.Choice.Race$Scenario1Job12_1,-1))
names(PFP.Com_Scale_1_1) <- 'Scenario1Job12_1f'
PFP.Com_Scale_1_2 <- data.frame(str_sub(PFP.Choice.Race$Scenario1Job12_2,-1))
names(PFP.Com_Scale_1_2) <- 'Scenario1Job12_2f'
PFP.Com_Scale_2_1 <- data.frame(str_sub(PFP.Choice.Race$Scenario2Job12_1,-1))
names(PFP.Com_Scale_2_1) <- 'Scenario2Job12_1f'
PFP.Com_Scale_2_2 <- data.frame(str_sub(PFP.Choice.Race$Scenario2Job12_2,-1))
names(PFP.Com_Scale_2_2) <- 'Scenario2Job12_2f'
PFP.Com_Scale_3_1 <- data.frame(str_sub(PFP.Choice.Race$Scenario3Job12_1,-1))
names(PFP.Com_Scale_3_1) <- 'Scenario3Job12_1f'
PFP.Com_Scale_3_2 <- data.frame(str_sub(PFP.Choice.Race$Scenario3Job12_2,-1))
names(PFP.Com_Scale_3_2) <- 'Scenario3Job12_2f'

PFP.Com_Scale <- cbind(PFP.Choice.Race,
                       PFP.Com_Scale_1_1,PFP.Com_Scale_1_2,PFP.Com_Scale_2_1,
                       PFP.Com_Scale_2_2,PFP.Com_Scale_3_1,PFP.Com_Scale_3_2)
colSums(is.na(PFP.Com_Scale))

PFP.Choice11 <- PFP.Com_Scale %>%
  dplyr::select(ID,Treatment.ID,Study.ID,StartDate,EndDate,MaxMinTimeGap,Status,Progress,Duration..in.seconds.,
                Finished,RecordedDate,DistributionChannel,UserLanguage,
                Scenario1Choice,Scenario1Job12_1f,
                GeneralRisk,LotteryRisk,GeneralRiskNum,GeneralRiskB,GeneralRiskC,LotteryRiskB,LotteryRiskC,
                SelfEfficacy_1,SelfEfficacy_2,SelfEfficacy_3,SelfEfficacy_4,
                SelfEfficacy_5,SelfEfficacy_6,SelfEfficacy_7,SelfEfficacy_8,
                SE1,SE2,SE3,SE4,SE5,SE6,SE7,SE8,Efficacy_Factor,Efficacy_C,
                PSM_1,PSM_2,PSM_3,PSM_4,PSM_5,PSM_6,
                PSM1,PSM2,PSM3,PSM4,PSM5,PSM_Factor,PSM_C,
                Age,Gender,Education,State,Employment,Income,Sector1,Sector1_7_TEXT,Sector2,Sector2_7_TEXT,
                PublicSector,Party,Party_3_TEXT,Race,Honesty,
                Race_B,Race_C,Age_B,
                F.1.1.1,F.1.1.2,F.1.1.3,F.1.1.4,F.1.1.5,F.1.1.6,F.1.1.7,F.1.1.8) %>%
  rename(ScenarioChoice=Scenario1Choice,
         `Total pay`=F.1.1.1, `Performance bonuses`=F.1.1.2, `Job performance evaluation`=F.1.1.3,
         `Current community involvement`=F.1.1.4, `Community income`=F.1.1.5, `Community demographics`=F.1.1.6,
         `Overtime work`=F.1.1.7, `Key job task`=F.1.1.8) %>%
  mutate(Contest_no=1, Profile_no=1,
         Chosen_Job=ifelse(ScenarioChoice=="Job A",1,ifelse(ScenarioChoice=="Job B",0,99)),
         Chosen_Job_Scale=Scenario1Job12_1f,
         Chosen_Job_Scale_binary=ifelse(Scenario1Job12_1f>4,1,0)) %>%
  dplyr::select(-Scenario1Job12_1f)
PFP.Choice12 <- PFP.Com_Scale %>%
  dplyr::select(ID,Treatment.ID,Study.ID,StartDate,EndDate,MaxMinTimeGap,Status,Progress,Duration..in.seconds.,
                Finished,RecordedDate,DistributionChannel,UserLanguage,
                Scenario1Choice,Scenario1Job12_2f,
                GeneralRisk,LotteryRisk,GeneralRiskNum,GeneralRiskB,GeneralRiskC,LotteryRiskB,LotteryRiskC,
                SelfEfficacy_1,SelfEfficacy_2,SelfEfficacy_3,SelfEfficacy_4,
                SelfEfficacy_5,SelfEfficacy_6,SelfEfficacy_7,SelfEfficacy_8,
                SE1,SE2,SE3,SE4,SE5,SE6,SE7,SE8,Efficacy_Factor,Efficacy_C,
                PSM_1,PSM_2,PSM_3,PSM_4,PSM_5,PSM_6,
                PSM1,PSM2,PSM3,PSM4,PSM5,PSM_Factor,PSM_C,
                Age,Gender,Education,State,Employment,Income,Sector1,Sector1_7_TEXT,Sector2,Sector2_7_TEXT,
                PublicSector,Party,Party_3_TEXT,Race,Honesty,
                Race_B,Race_C,Age_B,
                F.1.2.1,F.1.2.2,F.1.2.3,F.1.2.4,F.1.2.5,F.1.2.6,F.1.2.7,F.1.2.8) %>%
  rename(ScenarioChoice=Scenario1Choice,
         `Total pay`=F.1.2.1, `Performance bonuses`=F.1.2.2, `Job performance evaluation`=F.1.2.3,
         `Current community involvement`=F.1.2.4, `Community income`=F.1.2.5, `Community demographics`=F.1.2.6,
         `Overtime work`=F.1.2.7, `Key job task`=F.1.2.8) %>%
  mutate(Contest_no=1, Profile_no=2,
         Chosen_Job=ifelse(ScenarioChoice=="Job B",1,ifelse(ScenarioChoice=="Job A",0,99)),
         Chosen_Job_Scale=Scenario1Job12_2f,
         Chosen_Job_Scale_binary=ifelse(Scenario1Job12_2f>4,1,0)) %>%
  dplyr::select(-Scenario1Job12_2f)
PFP.Choice21 <- PFP.Com_Scale %>%
  dplyr::select(ID,Treatment.ID,Study.ID,StartDate,EndDate,MaxMinTimeGap,Status,Progress,Duration..in.seconds.,
                Finished,RecordedDate,DistributionChannel,UserLanguage,
                Scenario2Choice,Scenario2Job12_1f,
                GeneralRisk,LotteryRisk,GeneralRiskNum,GeneralRiskB,GeneralRiskC,LotteryRiskB,LotteryRiskC,
                SelfEfficacy_1,SelfEfficacy_2,SelfEfficacy_3,SelfEfficacy_4,
                SelfEfficacy_5,SelfEfficacy_6,SelfEfficacy_7,SelfEfficacy_8,
                SE1,SE2,SE3,SE4,SE5,SE6,SE7,SE8,Efficacy_Factor,Efficacy_C,
                PSM_1,PSM_2,PSM_3,PSM_4,PSM_5,PSM_6,
                PSM1,PSM2,PSM3,PSM4,PSM5,PSM_Factor,PSM_C,
                Age,Gender,Education,State,Employment,Income,Sector1,Sector1_7_TEXT,Sector2,Sector2_7_TEXT,
                PublicSector,Party,Party_3_TEXT,Race,Honesty,
                Race_B,Race_C,Age_B,
                F.2.1.1,F.2.1.2,F.2.1.3,F.2.1.4,F.2.1.5,F.2.1.6,F.2.1.7,F.2.1.8) %>%
  rename(ScenarioChoice=Scenario2Choice,
         `Total pay`=F.2.1.1, `Performance bonuses`=F.2.1.2, `Job performance evaluation`=F.2.1.3,
         `Current community involvement`=F.2.1.4, `Community income`=F.2.1.5, `Community demographics`=F.2.1.6,
         `Overtime work`=F.2.1.7, `Key job task`=F.2.1.8) %>%
  mutate(Contest_no=2, Profile_no=1,
         Chosen_Job=ifelse(ScenarioChoice=="Job A",1,ifelse(ScenarioChoice=="Job B",0,99)),
         Chosen_Job_Scale=Scenario2Job12_1f,
         Chosen_Job_Scale_binary=ifelse(Scenario2Job12_1f>4,1,0)) %>%
  dplyr::select(-Scenario2Job12_1f)
PFP.Choice22 <- PFP.Com_Scale %>%
  dplyr::select(ID,Treatment.ID,Study.ID,StartDate,EndDate,MaxMinTimeGap,Status,Progress,Duration..in.seconds.,
                Finished,RecordedDate,DistributionChannel,UserLanguage,
                Scenario2Choice,Scenario2Job12_2f,
                GeneralRisk,LotteryRisk,GeneralRiskNum,GeneralRiskB,GeneralRiskC,LotteryRiskB,LotteryRiskC,
                SelfEfficacy_1,SelfEfficacy_2,SelfEfficacy_3,SelfEfficacy_4,
                SelfEfficacy_5,SelfEfficacy_6,SelfEfficacy_7,SelfEfficacy_8,
                SE1,SE2,SE3,SE4,SE5,SE6,SE7,SE8,Efficacy_Factor,Efficacy_C,
                PSM_1,PSM_2,PSM_3,PSM_4,PSM_5,PSM_6,
                PSM1,PSM2,PSM3,PSM4,PSM5,PSM_Factor,PSM_C,
                Age,Gender,Education,State,Employment,Income,Sector1,Sector1_7_TEXT,Sector2,Sector2_7_TEXT,
                PublicSector,Party,Party_3_TEXT,Race,Honesty,
                Race_B,Race_C,Age_B,
                F.2.2.1,F.2.2.2,F.2.2.3,F.2.2.4,F.2.2.5,F.2.2.6,F.2.2.7,F.2.2.8) %>%
  rename(ScenarioChoice=Scenario2Choice,
         `Total pay`=F.2.2.1, `Performance bonuses`=F.2.2.2, `Job performance evaluation`=F.2.2.3,
         `Current community involvement`=F.2.2.4, `Community income`=F.2.2.5, `Community demographics`=F.2.2.6,
         `Overtime work`=F.2.2.7, `Key job task`=F.2.2.8) %>%
  mutate(Contest_no=2, Profile_no=2,
         Chosen_Job=ifelse(ScenarioChoice=="Job B",1,ifelse(ScenarioChoice=="Job A",0,99)),
         Chosen_Job_Scale=Scenario2Job12_2f,
         Chosen_Job_Scale_binary=ifelse(Scenario2Job12_2f>4,1,0)) %>%
  dplyr::select(-Scenario2Job12_2f)
PFP.Choice31 <- PFP.Com_Scale %>%
  dplyr::select(ID,Treatment.ID,Study.ID,StartDate,EndDate,MaxMinTimeGap,Status,Progress,Duration..in.seconds.,
                Finished,RecordedDate,DistributionChannel,UserLanguage,
                Scenario3Choice,Scenario3Job12_1f,
                GeneralRisk,LotteryRisk,GeneralRiskNum,GeneralRiskB,GeneralRiskC,LotteryRiskB,LotteryRiskC,
                SelfEfficacy_1,SelfEfficacy_2,SelfEfficacy_3,SelfEfficacy_4,
                SelfEfficacy_5,SelfEfficacy_6,SelfEfficacy_7,SelfEfficacy_8,
                SE1,SE2,SE3,SE4,SE5,SE6,SE7,SE8,Efficacy_Factor,Efficacy_C,
                PSM_1,PSM_2,PSM_3,PSM_4,PSM_5,PSM_6,
                PSM1,PSM2,PSM3,PSM4,PSM5,PSM_Factor,PSM_C,
                Age,Gender,Education,State,Employment,Income,Sector1,Sector1_7_TEXT,Sector2,Sector2_7_TEXT,
                PublicSector,Party,Party_3_TEXT,Race,Honesty,
                Race_B,Race_C,Age_B,
                F.3.1.1,F.3.1.2,F.3.1.3,F.3.1.4,F.3.1.5,F.3.1.6,F.3.1.7,F.3.1.8) %>%
  rename(ScenarioChoice=Scenario3Choice,
         `Total pay`=F.3.1.1, `Performance bonuses`=F.3.1.2, `Job performance evaluation`=F.3.1.3,
         `Current community involvement`=F.3.1.4, `Community income`=F.3.1.5, `Community demographics`=F.3.1.6,
         `Overtime work`=F.3.1.7, `Key job task`=F.3.1.8) %>%
  mutate(Contest_no=3, Profile_no=1,
         Chosen_Job=ifelse(ScenarioChoice=="Job A",1,ifelse(ScenarioChoice=="Job B",0,99)),
         Chosen_Job_Scale=Scenario3Job12_1f,
         Chosen_Job_Scale_binary=ifelse(Scenario3Job12_1f>4,1,0)) %>%
  dplyr::select(-Scenario3Job12_1f)
PFP.Choice32 <- PFP.Com_Scale %>%
  dplyr::select(ID,Treatment.ID,Study.ID,StartDate,EndDate,MaxMinTimeGap,Status,Progress,Duration..in.seconds.,
                Finished,RecordedDate,DistributionChannel,UserLanguage,
                Scenario3Choice,Scenario3Job12_2f,
                GeneralRisk,LotteryRisk,GeneralRiskNum,GeneralRiskB,GeneralRiskC,LotteryRiskB,LotteryRiskC,
                SelfEfficacy_1,SelfEfficacy_2,SelfEfficacy_3,SelfEfficacy_4,
                SelfEfficacy_5,SelfEfficacy_6,SelfEfficacy_7,SelfEfficacy_8,
                SE1,SE2,SE3,SE4,SE5,SE6,SE7,SE8,Efficacy_Factor,Efficacy_C,
                PSM_1,PSM_2,PSM_3,PSM_4,PSM_5,PSM_6,
                PSM1,PSM2,PSM3,PSM4,PSM5,PSM_Factor,PSM_C,
                Age,Gender,Education,State,Employment,Income,Sector1,Sector1_7_TEXT,Sector2,Sector2_7_TEXT,
                PublicSector,Party,Party_3_TEXT,Race,Honesty,
                Race_B,Race_C,Age_B,
                F.3.2.1,F.3.2.2,F.3.2.3,F.3.2.4,F.3.2.5,F.3.2.6,F.3.2.7,F.3.2.8) %>%
  rename(ScenarioChoice=Scenario3Choice,
         `Total pay`=F.3.2.1, `Performance bonuses`=F.3.2.2, `Job performance evaluation`=F.3.2.3,
         `Current community involvement`=F.3.2.4, `Community income`=F.3.2.5, `Community demographics`=F.3.2.6,
         `Overtime work`=F.3.2.7, `Key job task`=F.3.2.8) %>%
  mutate(Contest_no=3, Profile_no=2,
         Chosen_Job=ifelse(ScenarioChoice=="Job B",1,ifelse(ScenarioChoice=="Job A",0,99)),
         Chosen_Job_Scale=Scenario3Job12_2f,
         Chosen_Job_Scale_binary=ifelse(Scenario3Job12_2f>4,1,0)) %>%
  dplyr::select(-Scenario3Job12_2f)

PFP.Choice <- rbind(PFP.Choice11,PFP.Choice12,PFP.Choice21,PFP.Choice22,PFP.Choice31,PFP.Choice32)
str(PFP.Choice)
colSums(is.na(PFP.Choice))

PFP.Choice.Totalpay <- gsub("[<][/]*[A-z]+[>]","",PFP.Choice$`Total pay`)
PFP.Choice.Performancebonuses <- gsub("[<][/]*[A-z]+[>]","",PFP.Choice$`Performance bonuses`)
PFP.Choice.Jobperformanceevaluation <- gsub("[<][/]*[A-z]+[>]","",PFP.Choice$`Job performance evaluation`)
PFP.Choice.Currentcommunityinvolvement <- gsub("[<][/]*[A-z]+[>]","",PFP.Choice$`Current community involvement`)
PFP.Choice.Communityincome <- gsub("[<][/]*[A-z]+[>]","",PFP.Choice$`Community income`)
PFP.Choice.Communitydemographics <- gsub("[<][/]*[A-z]+[>]","",PFP.Choice$`Community demographics`)
PFP.Choice.Overtimework <- gsub("[<][/]*[A-z]+[>]","",PFP.Choice$`Overtime work`)
PFP.Choice.Keyjobtask <- gsub("[<][/]*[A-z]+[>]","",PFP.Choice$`Key job task`)

PFP.Choice.Final <- cbind(PFP.Choice,PFP.Choice.Totalpay,PFP.Choice.Performancebonuses,
                          PFP.Choice.Jobperformanceevaluation,PFP.Choice.Currentcommunityinvolvement,
                          PFP.Choice.Communityincome,PFP.Choice.Communitydemographics,
                          PFP.Choice.Overtimework,PFP.Choice.Keyjobtask) %>%
  dplyr::select(-`Total pay`,-`Performance bonuses`,-`Job performance evaluation`,-`Current community involvement`,
                -`Community income`,-`Community demographics`,-`Overtime work`,-`Key job task`) %>%
  rename(`Total pay`=PFP.Choice.Totalpay, `Performance bonuses`=PFP.Choice.Performancebonuses,
         `Job performance evaluation`=PFP.Choice.Jobperformanceevaluation,
         `Current community involvement`=PFP.Choice.Currentcommunityinvolvement,
         `Community income`=PFP.Choice.Communityincome,
         `Community demographics`=PFP.Choice.Communitydemographics,
         `Overtime work`=PFP.Choice.Overtimework, `Key job task`=PFP.Choice.Keyjobtask) %>%
  mutate(`Total pay`= factor(`Total pay`),`Performance bonuses`= factor(`Performance bonuses`),
         `Job performance evaluation`= factor(`Job performance evaluation`),
         `Current community involvement`= factor(`Current community involvement`),
         `Community income`= factor(`Community income`),`Community demographics`= factor(`Community demographics`),
         `Overtime work`= factor(`Overtime work`),`Key job task`= factor(`Key job task`)) %>%
  mutate(Contest_no=factor(Contest_no), Profile_no=factor(Profile_no))

table(PFP.Choice.Final$Chosen_Job, exclude=NULL)
table(PFP.Choice.Final$Chosen_Job_Scale, exclude=NULL)

PFP.Choice.Final.Collapse <- PFP.Choice.Final %>%
  mutate(`Performance bonuses binary`=
           ifelse(`Performance bonuses`!="No performance bonuses; fixed salary", "Yes (bonuses)", "No (fixed salary)"),
         `Goal based evaluation`=
           ifelse(`Job performance evaluation`!="A supervisor evaluation of your work", "Yes (goal based)", "No (supervisor based)"),
         `Performance bonuses binary`=as.factor(`Performance bonuses binary`),
         `Goal based evaluation`=as.factor(`Goal based evaluation`))
table(PFP.Choice.Final.Collapse$`Performance bonuses binary`,exclude=NULL)
table(PFP.Choice.Final.Collapse$`Goal based evaluation`,exclude=NULL)

PFP.Choice.Final.Collapse <- within(PFP.Choice.Final.Collapse, `Total pay` <- relevel(`Total pay`, ref = "Slightly below average"))
PFP.Choice.Final.Collapse <- within(PFP.Choice.Final.Collapse, `Performance bonuses` <- relevel(`Performance bonuses`, ref = "No performance bonuses; fixed salary"))
PFP.Choice.Final.Collapse <- within(PFP.Choice.Final.Collapse, `Performance bonuses binary` <- relevel(`Performance bonuses binary`, ref = "No (fixed salary)"))
PFP.Choice.Final.Collapse <- within(PFP.Choice.Final.Collapse, `Job performance evaluation` <- relevel(`Job performance evaluation`, ref = "A supervisor evaluation of your work"))
PFP.Choice.Final.Collapse <- within(PFP.Choice.Final.Collapse, `Goal based evaluation` <- relevel(`Goal based evaluation`, ref = "No (supervisor based)"))
PFP.Choice.Final.Collapse <- within(PFP.Choice.Final.Collapse, `Current community involvement` <- relevel(`Current community involvement`, ref = "Rare participation"))
PFP.Choice.Final.Collapse <- within(PFP.Choice.Final.Collapse, `Community income` <- relevel(`Community income`, ref = "Low income"))
PFP.Choice.Final.Collapse <- within(PFP.Choice.Final.Collapse, `Community demographics` <- relevel(`Community demographics`, ref = "Mostly white"))
PFP.Choice.Final.Collapse <- within(PFP.Choice.Final.Collapse, `Overtime work` <- relevel(`Overtime work`, ref = "Never required"))
PFP.Choice.Final.Collapse <- within(PFP.Choice.Final.Collapse, `Key job task` <- relevel(`Key job task`, ref = "Analysis identifying community needs"))


# 6. Final Analytic Sample
P4P_Clean <- PFP.Choice.Final.Collapse
colSums(is.na(P4P_Clean))
save(P4P_Clean, file='P4P_Clean.rda')

